# importing required libraries
import numpy as np
import pandas as pd
import plotly
# importing csv files as pandas dataframes
races = pd.read_csv("F1_data/races.csv")
circuits = pd.read_csv("F1_data/circuits.csv")
display(races)
display(circuits)
| raceId | year | round | circuitId | name | date | time | url | fp1_date | fp1_time | fp2_date | fp2_time | fp3_date | fp3_time | quali_date | quali_time | sprint_date | sprint_time | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 2009 | 1 | 1 | Australian Grand Prix | 2009-03-29 | 06:00:00 | http://en.wikipedia.org/wiki/2009_Australian_G... | \N | \N | \N | \N | \N | \N | \N | \N | \N | \N |
| 1 | 2 | 2009 | 2 | 2 | Malaysian Grand Prix | 2009-04-05 | 09:00:00 | http://en.wikipedia.org/wiki/2009_Malaysian_Gr... | \N | \N | \N | \N | \N | \N | \N | \N | \N | \N |
| 2 | 3 | 2009 | 3 | 17 | Chinese Grand Prix | 2009-04-19 | 07:00:00 | http://en.wikipedia.org/wiki/2009_Chinese_Gran... | \N | \N | \N | \N | \N | \N | \N | \N | \N | \N |
| 3 | 4 | 2009 | 4 | 3 | Bahrain Grand Prix | 2009-04-26 | 12:00:00 | http://en.wikipedia.org/wiki/2009_Bahrain_Gran... | \N | \N | \N | \N | \N | \N | \N | \N | \N | \N |
| 4 | 5 | 2009 | 5 | 4 | Spanish Grand Prix | 2009-05-10 | 12:00:00 | http://en.wikipedia.org/wiki/2009_Spanish_Gran... | \N | \N | \N | \N | \N | \N | \N | \N | \N | \N |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1074 | 1092 | 2022 | 18 | 22 | Japanese Grand Prix | 2022-10-09 | 05:00:00 | http://en.wikipedia.org/wiki/2022_Japanese_Gra... | 2022-10-07 | 04:00:00 | 2022-10-07 | 08:00:00 | 2022-10-08 | 04:00:00 | 2022-10-08 | 07:00:00 | \N | \N |
| 1075 | 1093 | 2022 | 19 | 69 | United States Grand Prix | 2022-10-23 | 19:00:00 | http://en.wikipedia.org/wiki/2022_United_State... | 2022-10-21 | 19:00:00 | 2022-10-21 | 22:00:00 | 2022-10-22 | 19:00:00 | 2022-10-22 | 22:00:00 | \N | \N |
| 1076 | 1094 | 2022 | 20 | 32 | Mexico City Grand Prix | 2022-10-30 | 20:00:00 | http://en.wikipedia.org/wiki/2022_Mexican_Gran... | 2022-10-28 | 18:00:00 | 2022-10-28 | 21:00:00 | 2022-10-29 | 17:00:00 | 2022-10-29 | 20:00:00 | \N | \N |
| 1077 | 1095 | 2022 | 21 | 18 | Brazilian Grand Prix | 2022-11-13 | 18:00:00 | http://en.wikipedia.org/wiki/2022_Brazilian_Gr... | 2022-11-11 | 15:30:00 | 2022-11-12 | 15:30:00 | \N | \N | 2022-11-11 | 19:00:00 | 2022-11-12 | 19:30:00 |
| 1078 | 1096 | 2022 | 22 | 24 | Abu Dhabi Grand Prix | 2022-11-20 | 13:00:00 | http://en.wikipedia.org/wiki/2022_Abu_Dhabi_Gr... | 2022-11-18 | 09:00:00 | 2022-11-18 | 12:00:00 | 2022-11-19 | 10:00:00 | 2022-11-19 | 13:00:00 | \N | \N |
1079 rows × 18 columns
| circuitId | circuitRef | name | location | country | lat | lng | alt | url | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | albert_park | Albert Park Grand Prix Circuit | Melbourne | Australia | -37.84970 | 144.96800 | 10 | http://en.wikipedia.org/wiki/Melbourne_Grand_P... |
| 1 | 2 | sepang | Sepang International Circuit | Kuala Lumpur | Malaysia | 2.76083 | 101.73800 | 18 | http://en.wikipedia.org/wiki/Sepang_Internatio... |
| 2 | 3 | bahrain | Bahrain International Circuit | Sakhir | Bahrain | 26.03250 | 50.51060 | 7 | http://en.wikipedia.org/wiki/Bahrain_Internati... |
| 3 | 4 | catalunya | Circuit de Barcelona-Catalunya | Montmeló | Spain | 41.57000 | 2.26111 | 109 | http://en.wikipedia.org/wiki/Circuit_de_Barcel... |
| 4 | 5 | istanbul | Istanbul Park | Istanbul | Turkey | 40.95170 | 29.40500 | 130 | http://en.wikipedia.org/wiki/Istanbul_Park |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 71 | 75 | portimao | Autódromo Internacional do Algarve | Portimão | Portugal | 37.22700 | -8.62670 | 108 | http://en.wikipedia.org/wiki/Algarve_Internati... |
| 72 | 76 | mugello | Autodromo Internazionale del Mugello | Mugello | Italy | 43.99750 | 11.37190 | 255 | http://en.wikipedia.org/wiki/Mugello_Circuit |
| 73 | 77 | jeddah | Jeddah Corniche Circuit | Jeddah | Saudi Arabia | 21.63190 | 39.10440 | 15 | http://en.wikipedia.org/wiki/Jeddah_Street_Cir... |
| 74 | 78 | losail | Losail International Circuit | Al Daayen | Qatar | 25.49000 | 51.45420 | \N | http://en.wikipedia.org/wiki/Losail_Internatio... |
| 75 | 79 | miami | Miami International Autodrome | Miami | USA | 25.95810 | -80.23890 | \N | http://en.wikipedia.org/wiki/Miami_Internation... |
76 rows × 9 columns
# creating subsets of columns with the required columns
races_subset = races[['circuitId', 'name','year','date']]
circuits_subset = circuits[['circuitId', 'name','location','country']]
display(races_subset)
display(circuits_subset)
| circuitId | name | year | date | |
|---|---|---|---|---|
| 0 | 1 | Australian Grand Prix | 2009 | 2009-03-29 |
| 1 | 2 | Malaysian Grand Prix | 2009 | 2009-04-05 |
| 2 | 17 | Chinese Grand Prix | 2009 | 2009-04-19 |
| 3 | 3 | Bahrain Grand Prix | 2009 | 2009-04-26 |
| 4 | 4 | Spanish Grand Prix | 2009 | 2009-05-10 |
| ... | ... | ... | ... | ... |
| 1074 | 22 | Japanese Grand Prix | 2022 | 2022-10-09 |
| 1075 | 69 | United States Grand Prix | 2022 | 2022-10-23 |
| 1076 | 32 | Mexico City Grand Prix | 2022 | 2022-10-30 |
| 1077 | 18 | Brazilian Grand Prix | 2022 | 2022-11-13 |
| 1078 | 24 | Abu Dhabi Grand Prix | 2022 | 2022-11-20 |
1079 rows × 4 columns
| circuitId | name | location | country | |
|---|---|---|---|---|
| 0 | 1 | Albert Park Grand Prix Circuit | Melbourne | Australia |
| 1 | 2 | Sepang International Circuit | Kuala Lumpur | Malaysia |
| 2 | 3 | Bahrain International Circuit | Sakhir | Bahrain |
| 3 | 4 | Circuit de Barcelona-Catalunya | Montmeló | Spain |
| 4 | 5 | Istanbul Park | Istanbul | Turkey |
| ... | ... | ... | ... | ... |
| 71 | 75 | Autódromo Internacional do Algarve | Portimão | Portugal |
| 72 | 76 | Autodromo Internazionale del Mugello | Mugello | Italy |
| 73 | 77 | Jeddah Corniche Circuit | Jeddah | Saudi Arabia |
| 74 | 78 | Losail International Circuit | Al Daayen | Qatar |
| 75 | 79 | Miami International Autodrome | Miami | USA |
76 rows × 4 columns
# renaming column names for better understanding
circuits_subset_renamed = circuits_subset.rename(columns={'name':'Name of the stadium'})
races_subset_renamed = races_subset.rename(columns={'name':'Race Name'})
# joining the 2 datasets to create a final database for analysis
race_circuits_combined=pd.merge(races_subset_renamed,circuits_subset_renamed,on='circuitId',how='left')
display(race_circuits_combined)
| circuitId | Race Name | year | date | Name of the stadium | location | country | |
|---|---|---|---|---|---|---|---|
| 0 | 1 | Australian Grand Prix | 2009 | 2009-03-29 | Albert Park Grand Prix Circuit | Melbourne | Australia |
| 1 | 2 | Malaysian Grand Prix | 2009 | 2009-04-05 | Sepang International Circuit | Kuala Lumpur | Malaysia |
| 2 | 17 | Chinese Grand Prix | 2009 | 2009-04-19 | Shanghai International Circuit | Shanghai | China |
| 3 | 3 | Bahrain Grand Prix | 2009 | 2009-04-26 | Bahrain International Circuit | Sakhir | Bahrain |
| 4 | 4 | Spanish Grand Prix | 2009 | 2009-05-10 | Circuit de Barcelona-Catalunya | Montmeló | Spain |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 1074 | 22 | Japanese Grand Prix | 2022 | 2022-10-09 | Suzuka Circuit | Suzuka | Japan |
| 1075 | 69 | United States Grand Prix | 2022 | 2022-10-23 | Circuit of the Americas | Austin | USA |
| 1076 | 32 | Mexico City Grand Prix | 2022 | 2022-10-30 | Autódromo Hermanos Rodríguez | Mexico City | Mexico |
| 1077 | 18 | Brazilian Grand Prix | 2022 | 2022-11-13 | Autódromo José Carlos Pace | São Paulo | Brazil |
| 1078 | 24 | Abu Dhabi Grand Prix | 2022 | 2022-11-20 | Yas Marina Circuit | Abu Dhabi | UAE |
1079 rows × 7 columns
# rearranging columns for better visibility
Final_dataset = race_circuits_combined[['circuitId', 'Race Name', 'Name of the stadium', 'country', 'location','date','year']]
Final_dataset.head()
| circuitId | Race Name | Name of the stadium | country | location | date | year | |
|---|---|---|---|---|---|---|---|
| 0 | 1 | Australian Grand Prix | Albert Park Grand Prix Circuit | Australia | Melbourne | 2009-03-29 | 2009 |
| 1 | 2 | Malaysian Grand Prix | Sepang International Circuit | Malaysia | Kuala Lumpur | 2009-04-05 | 2009 |
| 2 | 17 | Chinese Grand Prix | Shanghai International Circuit | China | Shanghai | 2009-04-19 | 2009 |
| 3 | 3 | Bahrain Grand Prix | Bahrain International Circuit | Bahrain | Sakhir | 2009-04-26 | 2009 |
| 4 | 4 | Spanish Grand Prix | Circuit de Barcelona-Catalunya | Spain | Montmeló | 2009-05-10 | 2009 |
# calculating summary 1 for the number of races across countries and various racetracks
summary1 = pd.DataFrame(Final_dataset.groupby(['country','circuitId'], as_index = False)['Race Name'].count())
summary1 = summary1.rename(columns={'Race Name':'No_of_races'})
display(summary1)
| country | circuitId | No_of_races | |
|---|---|---|---|
| 0 | Argentina | 25 | 20 |
| 1 | Australia | 1 | 25 |
| 2 | Australia | 29 | 11 |
| 3 | Austria | 57 | 1 |
| 4 | Austria | 70 | 36 |
| ... | ... | ... | ... |
| 71 | USA | 46 | 20 |
| 72 | USA | 60 | 1 |
| 73 | USA | 63 | 1 |
| 74 | USA | 69 | 10 |
| 75 | USA | 79 | 1 |
76 rows × 3 columns
# creating the final summary for the total number of races across all countries
summary2 = pd.DataFrame(summary1.groupby(['country'], as_index = False)['No_of_races'].sum())
display(summary2)
| country | No_of_races | |
|---|---|---|
| 0 | Argentina | 20 |
| 1 | Australia | 36 |
| 2 | Austria | 37 |
| 3 | Azerbaijan | 6 |
| 4 | Bahrain | 19 |
| 5 | Belgium | 67 |
| 6 | Brazil | 49 |
| 7 | Canada | 51 |
| 8 | China | 16 |
| 9 | France | 63 |
| 10 | Germany | 79 |
| 11 | Hungary | 37 |
| 12 | India | 3 |
| 13 | Italy | 104 |
| 14 | Japan | 38 |
| 15 | Korea | 4 |
| 16 | Malaysia | 19 |
| 17 | Mexico | 22 |
| 18 | Monaco | 68 |
| 19 | Morocco | 1 |
| 20 | Netherlands | 32 |
| 21 | Portugal | 18 |
| 22 | Qatar | 1 |
| 23 | Russia | 8 |
| 24 | Saudi Arabia | 2 |
| 25 | Singapore | 13 |
| 26 | South Africa | 23 |
| 27 | Spain | 59 |
| 28 | Sweden | 6 |
| 29 | Switzerland | 5 |
| 30 | Turkey | 9 |
| 31 | UAE | 14 |
| 32 | UK | 77 |
| 33 | USA | 73 |
# plotting the graph for summary 2
import plotly.express as px
fig = px.bar(summary2, x="country", y="No_of_races",
color='country',facet_row_spacing = 1,facet_col_spacing=1,title='Distriution of races hosted by countries across all seasons')
fig.update_traces(width=1)
fig.update_layout(xaxis={'categoryorder':'total ascending'})
fig.update_xaxes(tickangle= 50)
fig.show()